package com.platform.wx.spider;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;

import org.jsoup.Connection;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import jodd.util.StringUtil;
public class WechatSpider {
    public WechatSpider(String id) {
        setId(id);
    }

    private String id;
    protected Map<String,Object> model;
    private int totalpages = 0;
    private String sogouParam = "";
    private String Cookie = null;
    public void setId(String id) {
        this.id = id;
    }

    public String getVersion() {
        return "1.0";
    }

    public String getId() {
        return this.id;
    }

    private int getTotalPage(String str) {
        if (0 != totalpages) {
            return totalpages;
        }
        Pattern pattern = Pattern.compile("totalPages\":([0-9]*)");
        Matcher matcher = pattern.matcher(str);

        while (matcher.find()) {
            totalpages = Integer.parseInt(matcher.group(1));
        }
        return totalpages;
    }

    /**
     * 获取第一页的doc对象
     * 
     * @return
     */
    protected Document getDoc() throws Exception{

        String url = makeUrl();
        try {
            return Jsoup
                    .connect(url)
                    .timeout(10000)
                    .ignoreContentType(true)
                    .header("User-Agent",
                            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36")
                    .header("Cookie",
                            "ABTEST=8|1430710665|v1; SUID=F55370722708930A000000005546E989; PHPSESSID=0hk2d8cl4128niajvb4f4asfq6; SUIR=1430710665; SUID=F55370724FC80D0A000000005546E989; SNUID=D47250532024351871AD39CB21F3D59C; SUV=00EA70CE727053F55546F1207367B700; weixinIndexVisited=1; wuid=AAGjZr7TCQAAAAqUKHWrjwEAkwA=; ld=nAVZ9yllll2qSs4glllllVqpDNtllllltXxFdyllll9lllllxllll5@@@@@@@@@@; usid=pz2gIdtBRiERY8lB; sct=2; wapsogou_qq_nickname=; IPLOC=CN3200")
                    .get();
        } catch (IOException e) {
            return null;
        }
    }

    /**
     * 获取指定url的doc对象
     * 
     * @param url
     * @return
     */
    protected Document getDoc(String url) {
        try {
//            return Jsoup
//                    .connect(url)
//                    .timeout(10000)
//                    .ignoreContentType(true)
//                    .header("User-Agent",
//                            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36")
//                    .header("Cookie",
//                            "ABTEST=8|1430710665|v1; SUID=F55370722708930A000000005546E989; PHPSESSID=0hk2d8cl4128niajvb4f4asfq6; SUIR=1430710665; SUID=F55370724FC80D0A000000005546E989; SNUID=D47250532024351871AD39CB21F3D59C; SUV=00EA70CE727053F55546F1207367B700; weixinIndexVisited=1; wuid=AAGjZr7TCQAAAAqUKHWrjwEAkwA=; ld=nAVZ9yllll2qSs4glllllVqpDNtllllltXxFdyllll9lllllxllll5@@@@@@@@@@; usid=pz2gIdtBRiERY8lB; sct=2; wapsogou_qq_nickname=; IPLOC=CN3200")
//                    .get();

        	
            Connection conn = Jsoup.connect(url);
            conn.ignoreContentType(true)
            .timeout(10000)
            .header("User-Agent",
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36");
            if(StringUtil.isNotBlank(this.Cookie)) {
            	conn.header("Cookie",
            			Cookie);
            }else {
            	conn.header("Cookie",
	              "ABTEST=8|1430710665|v1; SUID=F55370722708930A000000005546E989; PHPSESSID=0hk2d8cl4128niajvb4f4asfq6; SUIR=1430710665; SUID=F55370724FC80D0A000000005546E989; SNUID=D47250532024351871AD39CB21F3D59C; SUV=00EA70CE727053F55546F1207367B700; weixinIndexVisited=1; wuid=AAGjZr7TCQAAAAqUKHWrjwEAkwA=; ld=nAVZ9yllll2qSs4glllllVqpDNtllllltXxFdyllll9lllllxllll5@@@@@@@@@@; usid=pz2gIdtBRiERY8lB; sct=2; wapsogou_qq_nickname=; IPLOC=CN3200")
	              .get();
            }
            
            conn.method(Method.GET);
            Response response;
            response = conn.execute();
            Map<String, String> getCookies = response.cookies();
            Cookie = getCookies.toString();
            Cookie = Cookie.substring(Cookie.indexOf("{")+1, Cookie.lastIndexOf("}"));
            Cookie = Cookie.replaceAll(",", ";");
            return Jsoup.parse(response.body());
            
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    /**
     * 获取搜狗url的参数
     */
    public String getSogouParam() {
        if (!"".equals(this.sogouParam)) {
            return this.sogouParam;
        }
        ScriptEngineManager sem = new ScriptEngineManager();
        ScriptEngine se = sem.getEngineByExtension("js");
        try {
            se.eval(new FileReader(this.getClass().getResource("/url.js")
                    .getPath()));
            se.eval("eval(\"window.SogouEncrypt.setKv('8d11ae022be','1')\")");
            this.sogouParam = (String) se
                    .eval("eval(\"window.SogouEncrypt.encryptquery('"
                            + this.getId() + "','sogou')\")");
        } catch (FileNotFoundException | ScriptException e) {
            System.out.println(e);
        }
        return this.sogouParam;
    }

    protected String makeUrl() throws Exception{
        if (null == id || "".equals(id)) {
            throw new Exception("must set id first");
        }
        String urlParams = this.getSogouParam();
        return "http://weixin.sogou.com/gzhjs?cb=sogou.weixin.gzhcb&" + urlParams;
    }

    protected String makeUrl(int page) throws Exception{
        if (null == id || "".equals(id)) {
            throw new Exception("must set id first");
        }
        String urlParams = this.getSogouParam();
        return "http://weixin.sogou.com/gzhjs?cb=sogou.weixin.gzhcb&" + urlParams + "&page=" + page;
    }

    protected void excute() throws Exception{
        Document doc = getDoc();
        if (null == doc) {
            throw new Exception("unknown error");
        }

        Element topicUrl = doc.select("url").first();
        if (null == topicUrl) {
            throw new Exception(
                    "make sure the openId is right, otherwise no topcs in this wechat account");
        }
        topicUrl.select("title1").remove();
        String url = topicUrl.text();
        fetchContent(url);
    }

    protected void fetchContent(String url) {
        Document doc = getDoc(url);
        if (null == doc) {
            return;
        }
        model = new HashMap<String,Object>();
        String title = doc.select("#activity-name").first().text();
        Elements imagesDom = doc.select("#js_content img[data-src]");
        String content = doc.select("#js_content").first().html();
        String date = doc.select("#post-date").first().text();
        String user = doc.select("#post-user").first().text();
        List<String> images = new ArrayList<>();
        for (Element img : imagesDom) {
            images.add(img.attr("data-src"));
        }

        model.put("content",content);
        model.put("images",images);
        model.put("url",url);
        model.put("title",title);
        model.put("date",date);
        model.put("user",user);
    }

    public Map<String,Object> getTopicByUrl(String url) {
        Document doc = getDoc(url);
        if (null == doc) {
            return null;
        }
        Map<String,Object> topic = new HashMap<String,Object>();
        String title = doc.select("#activity-name").first().text();
        Elements imagesDom = doc.select("#js_content img[data-src]");
        String content = doc.select("#js_content").first().html();
        String date = doc.select("#post-date").first().text();
        String user = doc.select("#post-user").first().text();
        List<String> images = new ArrayList<>();
        for (Element img : imagesDom) {
            images.add(img.attr("data-src"));
        }

        model.put("content",content);
        model.put("images",images);
        model.put("url",url);
        model.put("title",title);
        model.put("date",date);
        model.put("user",user);
        return topic;
    }


    /**
     * 获取指定页的全部话题
     * 
     * @param limit
     * @return
     */
    protected List<String> getTopicUrls(String url) {

    	List<String> result = new ArrayList<String>();
    	if(url.equals("") || null == url)
    	{
    		return result;
    	}
    	Document doc = getDoc(url);
        System.out.println(doc);
    	String jsonStr = doc.html().split("var msgList = ")[1].split("seajs.use")[0].trim();
    	String[] tempList = jsonStr.split("content_url\":\"");
    	for (int i = 0; i < tempList.length; i++) {
    		if(tempList[i].startsWith("/s"))
    		{
    			result.add("http://mp.weixin.qq.com" +  tempList[i].split("\",\"copyright_stat")[0].replaceAll("amp;", ""));
    		}
			
		}
    	
    	return result;
    }
    
    /**
     *页面url路径 $("[name=em_weixinhao]:contains(letvwallpapers)").parents(".txt-box").find(".tit a").attr("href")
     *
     */
    public String getListUrl(){
    	Document doc = getSogouSearch();
    	return "http://weixin.sogou.com" + doc.select("[name=em_weixinhao]:contains("+this.id+")").parents().select(".tit a").attr("href");
    }

    /**
     * 获取最新标题名称
     * @return
     */
    public Map<String,String> getLastArticle() {
    	Document doc = getSogouSearch();
    	Map<String,String> article = new HashMap<String,String>();
    	article.put("title", doc.select("dt:contains(最近文章)").parents().select("dd a").text());
    	return article;
    }
    
    public Document getSogouSearch() {
    	String baseUrl = "http://weixin.sogou.com/weixin?type=1&ie=utf8&query=";
    	String searchUrl = baseUrl+this.id;
    	Document doc = getDoc(searchUrl);
    	return doc;
    }
    /**
     * 获取指定页的文章doc对象
     * 
     * @param page
     *            当前页数
     * @return
     */
    public List<Document> getPageDocuments(int page) throws Exception{
        String url = makeUrl(page);
        Document doc = getDoc(url);
        System.out.println(url);
        if (null == doc) {
            throw new Exception("unknown error");
        }
        List<Document> docs = new ArrayList<Document>();
        if (0 != totalpages && page > totalpages) {
            return docs;
        }

        if (0 != totalpages) {
            getTotalPage(doc.select("pagesize").last().html().toString());
            if (page > totalpages) {
                return docs;
            }
        }

        ListIterator<Element> topicUrls = doc.select("url").listIterator();
        if (!topicUrls.hasNext()) {
            throw new Exception(
                    "make sure the openId is right, otherwise no topics in this wechat account");
        }

        while (topicUrls.hasNext()) {
            Element topicUrl = topicUrls.next();
            topicUrl.select("title1").remove();
            Document topicDoc = getDoc(topicUrl.text());
            if (null != topicDoc) {
                docs.add(topicDoc);
            }
            topicDoc.attr("originUrl", url);
        }
        return docs;
    }

    public static void main(String[] args) {
        WechatSpider spider = new WechatSpider("xjytzrx");//
//        String listUrl = spider.getLastArticleTitle();
    	Document doc = Jsoup.parse("<!doctype html>\r\n" + 
    			"<html>\r\n" + 
    			" <head> \r\n" + 
    			"  <link rel=\"shortcut icon\" href=\"//www.sogou.com/images/logo/new/favicon.ico?v=4\" type=\"image/x-icon\"> \r\n" + 
    			"  <link href=\"//dlweb.sogoucdn.com/logo/images/2018/apple-touch-icon.png\" id=\"apple-touch-icon\" rel=\"apple-touch-icon-precomposed\"> \r\n" + 
    			"  <link href=\"//www.sogou.com/sug/css/m3.min.v.7.css\" rel=\"stylesheet\" type=\"text/css\"> \r\n" + 
    			"  <link href=\"/new/pc/css/weixin-public-new.min.css?v=20190415\" rel=\"stylesheet\" type=\"text/css\"> \r\n" + 
    			"  <meta http-equiv=\"X-UA-Compatible\" content=\"IE=Edge\"> \r\n" + 
    			"  <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"> \r\n" + 
    			"  <meta http-equiv=\"Access-Control-Allow-Origin\" content=\"*\"> \r\n" + 
    			"  <meta content=\"width=device-width,initial-scale=1.0\" id=\"vp\" name=\"viewport\"> \r\n" + 
    			"  <title>xjytzrx的相关微信公众号 – 搜狗微信搜索</title> \r\n" + 
    			"  <script>\r\n" + 
    			"        var sst = {h_s :(new Date()).getTime()};\r\n" + 
    			"        var newpage = 1;\r\n" + 
    			"        var passportUserId = \"\";\r\n" + 
    			"        var oldQuery = \"xjytzrx\";\r\n" + 
    			"        var gbkQuery = \"xjytzrx\";\r\n" + 
    			"        var uuid = \"e5202057-f2c7-4051-ad3f-7dc2bdb987e3\";\r\n" + 
    			"        var keywords_string = \"xjytzrx\";\r\n" + 
    			"        var sab = \"8\";\r\n" + 
    			"        var keywords = oldQuery.split(' ');\r\n" + 
    			"        var now = 1564024786133;\r\n" + 
    			"        var idc = \"js\";\r\n" + 
    			"        var clientIp = \"218.205.57.22\";\r\n" + 
    			"        var isIpad = false;\r\n" + 
    			"        //var article_anti_url = \"\";\r\n" + 
    			"    </script> \r\n" + 
    			"  <script>\r\n" + 
    			"        //以下为动态的全局 js，防止外部网站通过 window.opener.location 篡改我们的页面，以后不要通过 window.location 获取当前地址，只能用 document.location\r\n" + 
    			"        \r\n" + 
    			"    </script> \r\n" + 
    			"  <script src=\"/js/jquery-1.11.0.min.js\" charset=\"gbk\"></script> \r\n" + 
    			"  <script src=\"/new/pc/js/https_util.min.js?v=20180607\"></script> \r\n" + 
    			"  <script src=\"/js/lib/juicer-min.js\"></script> \r\n" + 
    			"  <script src=\"/new/weixin/js/common.min.js?v=20180607\"></script> \r\n" + 
    			"  <script src=\"/new/pc/js/common.min.js?v=20180607\"></script> \r\n" + 
    			"  <script>\r\n" + 
    			"        var uigs_para = {\r\n" + 
    			"            \"uigs_t\": \"1564024786133\",\r\n" + 
    			"            \"uigs_productid\": \"vs_web\",\r\n" + 
    			"            \"terminal\"      : \"web\",\r\n" + 
    			"            \"vstype\"        : \"weixin\",\r\n" + 
    			"            \"pagetype\"      : \"result\",\r\n" + 
    			"            \"channel\"       : \"result_account\",\r\n" + 
    			"            \"s_from\"        : \"\",\r\n" + 
    			"            \"sourceid\"      : \"\",\r\n" + 
    			"            \"type\"          : \"weixin_search_pc\",\r\n" + 
    			"            \"uigs_cookie\"   : \"SUID,sct\",\r\n" + 
    			"            \"uuid\"          : \"e5202057-f2c7-4051-ad3f-7dc2bdb987e3\",\r\n" + 
    			"            \"query\"         : \"xjytzrx\",\r\n" + 
    			"            \"weixintype\"    : \"1\",\r\n" + 
    			"            \"exp_status\"    : \"-1\",\r\n" + 
    			"            \"exp_id_list\"   : \"0_0\",\r\n" + 
    			"            \"wuid\"          : \"00EA70CE727053F55546F1207367B700\",\r\n" + 
    			"            \"snuid\"         : \"B29A6979A4A62DDC41652C4DA4FBA9EB\",\r\n" + 
    			"            \"rn\"            : 1,\r\n" + 
    			"            \"login\"         : passportUserId ? \"1\" : \"0\",\r\n" + 
    			"            \"uphint\"        : 0,\r\n" + 
    			"            \"bottomhint\"    : 0,\r\n" + 
    			"            \"page\"          : \"1\"\r\n" + 
    			"        };\r\n" + 
    			"    </script> \r\n" + 
    			" </head> \r\n" + 
    			" <body> \r\n" + 
    			"  <!--start header--> \r\n" + 
    			"  <div class=\"header-box\"> \r\n" + 
    			"   <div class=\"login-info\"> \r\n" + 
    			"    <a id=\"top_login\" href=\"javascript:void(0);\" uigs=\"home_login_top\">登录</a> \r\n" + 
    			"   </div> \r\n" + 
    			"   <style id=\"loginStyle\" type=\"text/css\">\r\n" + 
    			"    .login-skin{position: fixed;_position: absolute;top:0;left:0;width: 100%;height: 100%;_height:expression(document.body.scrollHeight+\"px\");z-index: 2100;background-color: #000;opacity:0.4;filter:alpha(opacity=40);}.login-pop-wx{background-color: #fff;border: 1px solid #ebebeb;width: 510px;height: 420px;position:fixed;_position: absolute;margin-left:-225px;left: 50%;top: 200px;_top:expression(document.documentElement.scrollTop+200+\"px\");font-family: Microsoft YaHei;z-index: 2200;}}\r\n" + 
    			"</style> \r\n" + 
    			"   <div class=\"login-skin\" style=\"display: none\"></div> \r\n" + 
    			"   <script src=\"/new/pc/js/login.min.js?v=20170315\"></script> \r\n" + 
    			"   <div class=\"header\" id=\"scroll-header\"> \r\n" + 
    			"    <a title=\"回到搜狗首页\" href=\"/\" name=\"scroll-nav\" class=\"logo\" uigs=\"home\"></a> \r\n" + 
    			"    <ul class=\"searchnav\" name=\"scroll-nav\"> \r\n" + 
    			"     <li><a id=\"sogou_xinwen\" href=\"http://news.sogou.com/news?ie=utf8&amp;p=40230447&amp;query=xjytzrx\" onclick=\"navBar(this,'query=');\" uigs=\"nav_xinwen\">新闻</a></li> \r\n" + 
    			"     <li><a id=\"sogou_wangye\" href=\"http://www.sogou.com/web?ie=utf8&amp;query=xjytzrx\" onclick=\"navBar(this,'query=');\" uigs=\"nav_wangye\">网页</a></li> \r\n" + 
    			"     <li class=\"cur\"><a href=\"javascript:void(0)\">微信</a></li> \r\n" + 
    			"     <li><a id=\"sogou_zhihu\" href=\"http://zhihu.sogou.com/zhihu?ie=utf8&amp;p=73351201&amp;query=xjytzrx\" onclick=\"navBar(this,'query=')\" uigs=\"nav_zhihu\">知乎</a></li> \r\n" + 
    			"     <li><a id=\"sogou_tupian\" href=\"http://pic.sogou.com/pics?ie=utf8&amp;p=40230504&amp;query=xjytzrx\" onclick=\"navBar(this,'query=')\" uigs=\"nav_tupian\">图片</a></li> \r\n" + 
    			"     <li><a id=\"sogou_shipin\" href=\"https://v.sogou.com/v?ie=utf8&amp;p=40230608&amp;query=xjytzrx\" onclick=\"navBar(this,'query=')\" uigs=\"nav_shipin\">视频</a></li> \r\n" + 
    			"     <li><a id=\"sogou_mingyi\" href=\"https://www.sogou.com/web?m2web=mingyi.sogou.com&amp;ie=utf8&amp;query=xjytzrx\" onclick=\"navBar(this,'query=')\" uigs=\"nav_mingyi\">明医</a></li> \r\n" + 
    			"     <li><a id=\"sogou_yingwen\" href=\"http://english.sogou.com/english?b_o_e=1&amp;ie=utf8&amp;query=xjytzrx\" onclick=\"navBar(this,'query=')\" uigs=\"nav_yingwen\">英文</a></li> \r\n" + 
    			"     <li><a id=\"sogou_wenwen\" href=\"http://wenwen.sogou.com/s/?ch=weixinsearch&amp;w=xjytzrx\" data-index=\"http://wenwen.sogou.com/?ch=weixinsearch\" onclick=\"navBar(this,'w=')\" uigs=\"nav_wenwen\">问问</a></li> \r\n" + 
    			"     <li><a id=\"sogou_xueshu\" href=\"http://scholar.sogou.com/xueshu?ie=utf-8&amp;query=xjytzrx\" onclick=\"navBar(this,'query=')\" uigs=\"nav_xueshu\">学术</a></li> \r\n" + 
    			"     <li><a id=\"top_more\" href=\"http://www.sogou.com/docs/more.htm?v=1\" target=\"_blank\" uigs=\"nav_more\">更多&gt;&gt;</a></li> \r\n" + 
    			"    </ul> \r\n" + 
    			"    <form name=\"searchForm\" action=\"/weixin\"> \r\n" + 
    			"     <div class=\"querybox\"> \r\n" + 
    			"      <div class=\"qborder\"> \r\n" + 
    			"       <div class=\"qborder2\"> \r\n" + 
    			"        <input type=\"hidden\" name=\"type\" value=\"1\"> \r\n" + 
    			"        <input type=\"hidden\" name=\"s_from\" value=\"input\"> \r\n" + 
    			"        <input type=\"text\" class=\"query\" name=\"query\" id=\"query\" ov=\"xjytzrx\" value=\"xjytzrx\" autocomplete=\"off\"> \r\n" + 
    			"        <input type=\"hidden\" name=\"ie\" value=\"utf8\"> \r\n" + 
    			"        <a href=\"javascript:void(0)\" class=\"qreset2\" name=\"reset\" uigs=\"search_reset\"></a> \r\n" + 
    			"       </div> \r\n" + 
    			"      </div> \r\n" + 
    			"      <input type=\"button\" value=\"搜文章\" class=\"swz\" onclick=\"search(this,2)\" uigs=\"search_article\"> \r\n" + 
    			"      <input type=\"button\" value=\"搜公众号\" class=\"swz2\" onclick=\"search(this,1)\" uigs=\"search_account\"> \r\n" + 
    			"      <input type=\"hidden\" name=\"_sug_\" value=\"n\"> \r\n" + 
    			"      <input type=\"hidden\" name=\"_sug_type_\" value=\"\"> \r\n" + 
    			"     </div> \r\n" + 
    			"    </form> \r\n" + 
    			"   </div> \r\n" + 
    			"  </div> \r\n" + 
    			"  <!--end header--> \r\n" + 
    			"  <div class=\"wrapper\" id=\"wrapper\"> \r\n" + 
    			"   <div class=\"main-left\" id=\"main\"> \r\n" + 
    			"    <div class=\"dy-pop2 dy-pop5 float\" id=\"share_box\" style=\"display: none\"> \r\n" + 
    			"     <a href=\"javascript:void(0)\" class=\"close\" data-except=\"1\" uigs=\"other_float_share_close\"></a> \r\n" + 
    			"     <div class=\"fxico-box\"> \r\n" + 
    			"      <a href=\"javascript:void(0)\" class=\"sina\" data-except=\"1\" uigs=\"other_float_share_sina\"></a> \r\n" + 
    			"      <a href=\"javascript:void(0)\" class=\"renren\" target=\"_blank\" data-except=\"1\" uigs=\"other_float_share_renren\"></a> \r\n" + 
    			"      <a href=\"javascript:void(0)\" class=\"douban\" target=\"_blank\" data-except=\"1\" uigs=\"other_float_share_douban\"></a> \r\n" + 
    			"     </div> \r\n" + 
    			"    </div> \r\n" + 
    			"    <div class=\"dy-pop2 dy-pop5 float\" id=\"erweima_box\" style=\"display: none\"></div> \r\n" + 
    			"    <script type=\"text/template\" id=\"erweima_tpl\">\r\n" + 
    			"    <a href=\"javascript:void(0)\" class=\"close\" data-except=\"1\" uigs=\"other_float_weixin_close\"></a>\r\n" + 
    			"    <div class=\"fxico-box2\">微信扫一扫关注<br/><img width=\"104\" height=\"104\" src=\"${imgsrc}\"/></div>\r\n" + 
    			"</script> \r\n" + 
    			"    <script>\r\n" + 
    			"    //高级工具参数对象\r\n" + 
    			"    var toolParas = {\r\n" + 
    			"        tsn : '0',\r\n" + 
    			"        ft : '',\r\n" + 
    			"        et : '',\r\n" + 
    			"        interation : '',\r\n" + 
    			"        wxid : '',\r\n" + 
    			"        usip : ''\r\n" + 
    			"    };\r\n" + 
    			"    var from_tool = '0';\r\n" + 
    			"</script> \r\n" + 
    			"    <div class=\"wx-topbox\"> \r\n" + 
    			"     <div class=\"all-time\"> \r\n" + 
    			"      <div class=\"all-time-y2 \"> \r\n" + 
    			"       <div class=\"all-time-y all-time-y-v1\" id=\"text\">\r\n" + 
    			"         以下内容来自微信公众号 \r\n" + 
    			"       </div> \r\n" + 
    			"      </div> \r\n" + 
    			"     </div> \r\n" + 
    			"    </div> \r\n" + 
    			"    <div class=\"news-box\"> \r\n" + 
    			"     <ul class=\"news-list2\"> \r\n" + 
    			"      <!-- a --> \r\n" + 
    			"      <li id=\"sogou_vr_11002301_box_0\" d=\"oIWsFt2wKY4RuVb3oVgyMSso5Jhw\"> \r\n" + 
    			"       <div class=\"gzh-box2\"> \r\n" + 
    			"        <div class=\"img-box\"> \r\n" + 
    			"         <a target=\"_blank\" uigs=\"account_image_0\" href=\"/link?url=dn9a_-gY295K0Rci_xozVXfdMkSQTLW6EzDJysI4ql5MPrOUp16838dGRMI7NnPqj8AuepK3g1IY5S-4vpccLQwvDqyjOWdzpRNr5un7Q7I8hmQYp7GJCwuf2m0cekD3ie2FLecn9df4wGft07pYAdaNO2Sou0AOrg0j2-xCjK097QruH23m70ExtyIJA-16s-PyTMvktX3xuVm-ybxAMmRTpLLTNgHY&amp;type=1&amp;query=xjytzrx\"><span></span><img src=\"//img01.sogoucdn.com/app/a/100520090/oIWsFt2wKY4RuVb3oVgyMSso5Jhw\" onload=\"resizeImage(this,58,58)\" onerror=\"errorHeadImage(this)\"></a> \r\n" + 
    			"        </div> \r\n" + 
    			"        <div class=\"txt-box\"> \r\n" + 
    			"         <p class=\"tit\"> <a target=\"_blank\" uigs=\"account_name_0\" href=\"/link?url=dn9a_-gY295K0Rci_xozVXfdMkSQTLW6EzDJysI4ql5MPrOUp16838dGRMI7NnPqj8AuepK3g1IY5S-4vpccLQwvDqyjOWdzpRNr5un7Q7I8hmQYp7GJCwuf2m0cekD3ie2FLecn9df4wGft07pYAdaNO2Sou0AOrg0j2-xCjK097QruH23m70ExtyIJA-16s-PyTMvktX3xuVm-ybxAMmRTpLLTNgHY&amp;type=1&amp;query=xjytzrx\">宣继游财经直播</a> </p> \r\n" + 
    			"         <p class=\"info\">微信号：<label name=\"em_weixinhao\">xjytzrx</label> </p> \r\n" + 
    			"        </div> \r\n" + 
    			"        <div class=\"ew-pop\"> \r\n" + 
    			"         <a class=\"code\" href=\"javascript:void(0)\"><img height=\"24\" width=\"24\" src=\"/new/pc/images/ico_ewm.png\"></a>\r\n" + 
    			"         <span style=\"display:none;\" class=\"pop\"><i></i>微信扫一扫关注<br> <img height=\"104\" width=\"104\" src=\"https://img01.sogoucdn.com/v2/thumb?t=2&amp;url=http%3A%2F%2Fmp.weixin.qq.com%2Frr%3Fsrc%3D3%26timestamp%3D1564024786%26ver%3D1%26signature%3DeVX*lBNjg9bYeAynca0ShfGXnFhtXtaGyG4-rElik-fMeKy1ZgfoaSuHke8x8bfo7e62PWaEL57QhwSFtEGfn86PcoU7dF56ctS*84cDCrQ%3D&amp;appid=200580\" data-id=\"oIWsFt2wKY4RuVb3oVgyMSso5Jhw\" onerror=\"qrcodeShowError('http://mp.weixin.qq.com/rr?src=3&amp;timestamp=1564024786&amp;ver=1&amp;signature=eVX*lBNjg9bYeAynca0ShfGXnFhtXtaGyG4-rElik-fMeKy1ZgfoaSuHke8x8bfo7e62PWaEL57QhwSFtEGfn86PcoU7dF56ctS*84cDCrQ=',4,'oIWsFt2wKY4RuVb3oVgyMSso5Jhw')\"><img height=\"32\" width=\"32\" class=\"shot-img\" src=\"//img01.sogoucdn.com/app/a/100520090/oIWsFt2wKY4RuVb3oVgyMSso5Jhw\" onerror=\"errorHeadImage(this)\"></span> \r\n" + 
    			"        </div> \r\n" + 
    			"       </div> \r\n" + 
    			"       <dl> \r\n" + 
    			"        <dt>\r\n" + 
    			"         功能介绍：\r\n" + 
    			"        </dt> \r\n" + 
    			"        <dd>\r\n" + 
    			"         每天一条原创的财经评论……\r\n" + 
    			"        </dd> \r\n" + 
    			"       </dl> \r\n" + 
    			"       <dl> \r\n" + 
    			"        <dt>\r\n" + 
    			"         最近文章：\r\n" + 
    			"        </dt> \r\n" + 
    			"        <dd> \r\n" + 
    			"         <a target=\"_blank\" uigs=\"account_article_0\" href=\"/link?url=dn9a_-gY295K0Rci_xozVXfdMkSQTLW6cwJThYulHEtVjXrGTiVgSyRlEfZfRcB4m_gv4Jv8GiwtarOnvwobj1qXa8Fplpd9NMjhNBUgUsHe-BK9RqbgQWALLhW9fAk2dZnDzivHSKyARX52f44IuRgnlvQYlp-poe-WJ2J3GIE0J9GXzAOs3vnMdVlCDvLSV4UlQ-Q715hUFi_GSQEdeAL8bqaun-J6FRrxYlY8zBkOoYmRj28GwkEUah9j5yhEOQg__E-z_DfuL0unUdKkuw..&amp;type=1&amp;query=xjytzrx\">【继游随笔】科创板开业才三天 T+0就出现了</a>\r\n" + 
    			"         <span><script>document.write(timeConvert('1563958040'))</script></span> \r\n" + 
    			"        </dd> \r\n" + 
    			"       </dl> </li> \r\n" + 
    			"      <!-- z --> \r\n" + 
    			"     </ul> \r\n" + 
    			"    </div> \r\n" + 
    			"   </div> \r\n" + 
    			"   <script>var account_anti_url = \"/websearch/weixin/pc/anti_account.jsp?t=1564024786131&signature=H3x3tvJgsV0xHOjjZWZt3dL5Nlm4reF9maUJq-P4CpjnROWzk-bC5q98O7AoswzdACDSHXfUmLuu1ykj2Gj2Fw==\";</script> \r\n" + 
    			"  </div> \r\n" + 
    			"  <div class=\"back-top\" style=\"display: none;\">\r\n" + 
    			"   <a href=\"javascript:void(0);\" uigs=\"other_float_back_top\"></a>\r\n" + 
    			"  </div> \r\n" + 
    			"  <div class=\"bottom-form\"> \r\n" + 
    			"   <form name=\"searchForm\" action=\"/weixin\"> \r\n" + 
    			"    <div class=\"querybox\"> \r\n" + 
    			"     <div class=\"qborder\"> \r\n" + 
    			"      <div class=\"qborder2\"> \r\n" + 
    			"       <input type=\"hidden\" name=\"type\" value=\"1\"> \r\n" + 
    			"       <input type=\"hidden\" name=\"s_from\" value=\"input\"> \r\n" + 
    			"       <input type=\"text\" class=\"query\" name=\"query\" id=\"query\" ov=\"xjytzrx\" value=\"xjytzrx\" autocomplete=\"off\"> \r\n" + 
    			"       <input type=\"hidden\" name=\"ie\" value=\"utf8\"> \r\n" + 
    			"       <a href=\"javascript:void(0)\" class=\"qreset2\" name=\"reset\" uigs=\"search_reset\"></a> \r\n" + 
    			"      </div> \r\n" + 
    			"     </div> \r\n" + 
    			"     <input type=\"button\" value=\"搜文章\" class=\"swz\" onclick=\"search(this,2)\" uigs=\"search_article\"> \r\n" + 
    			"     <input type=\"button\" value=\"搜公众号\" class=\"swz2\" onclick=\"search(this,1)\" uigs=\"search_account\"> \r\n" + 
    			"     <input type=\"hidden\" name=\"_sug_\" value=\"n\"> \r\n" + 
    			"     <input type=\"hidden\" name=\"_sug_type_\" value=\"\"> \r\n" + 
    			"    </div> \r\n" + 
    			"   </form> \r\n" + 
    			"  </div> \r\n" + 
    			"  <div class=\"footer-box\" id=\"s_footer\"> \r\n" + 
    			"   <div class=\"footer\"> \r\n" + 
    			"    <a id=\"sogou_webhelp\" href=\"http://help.sogou.com/\" target=\"_blank\" uigs=\"bottom_ssbz\">搜索帮助</a>&nbsp;\r\n" + 
    			"    <a href=\"http://fankui.help.sogou.com/index.php/web/web/index/type/4\" target=\"_blank\" uigs=\"bottom_yjfk\">意见反馈及投诉</a>&nbsp;\r\n" + 
    			"    <script src=\"/websearch/wexinurlenc_sogou_profile.jsp\"></script>©&nbsp;2019&nbsp;SOGOU.COM&nbsp;&nbsp;&nbsp;&nbsp;\r\n" + 
    			"    <a href=\"http://www.sogou.com/docs/terms.htm\" target=\"_blank\" uigs=\"bottom_mzsm\">免责声明</a>&nbsp;\r\n" + 
    			"    <a href=\"http://corp.sogou.com/private.html\" target=\"_blank\" uigs=\"bottom_yszc\">隐私政策</a> \r\n" + 
    			"   </div> \r\n" + 
    			"  </div> \r\n" + 
    			"  <script src=\"/new/pc/js/account.min.js?v=20170315\"></script> \r\n" + 
    			"  <script>\r\n" + 
    			"        var WX_SUGG_PAGE_FROM=\"pcGzhSearch\";\r\n" + 
    			"        \r\n" + 
    			"        var SugPara = {\r\n" + 
    			"            \"bigsize\":true,\r\n" + 
    			"            \"enableSug\":true,\r\n" + 
    			"            \"sugType\":\"wxpub\",\r\n" + 
    			"            \"domain\":\"w.sugg.sogou.com\",\r\n" + 
    			"            \"productId\":\"web\",\r\n" + 
    			"            \"sugFormName\":\"sf\",\r\n" + 
    			"            \"submitId\":\"stb\",\r\n" + 
    			"            \"suggestRid\":\"01015002\",\r\n" + 
    			"            \"normalRid\":\"01019900\",\r\n" + 
    			"            \"oms\":1,\r\n" + 
    			"            \"nofixwidth\":1,\r\n" + 
    			"            \"useParent\":1\r\n" + 
    			"        };\r\n" + 
    			"        uigs_para.exp_id = \"null_0-\";\r\n" + 
    			"        uigs_para.exp_id = uigs_para.exp_id.substring(0, uigs_para.exp_id.length - 1);\r\n" + 
    			"    </script> \r\n" + 
    			"  <script src=\"/new/weixin/js/uigs.min.js?v=20180607\"></script> \r\n" + 
    			"  <script src=\"/new/pc/js/log.min.js?v=20170321\"></script> \r\n" + 
    			"  <script src=\"/new/pc/js/event.min.js?v=20190329\"></script> \r\n" + 
    			"  <script src=\"/new/pc/js/search.min.js?v=20161107\"></script> \r\n" + 
    			"  <script src=\"/new/pc/js/suggestion.min.js?v=20180607\"></script> \r\n" + 
    			"  <script src=\"/new/weixin/js/form.min.js?v=20170101\"></script> \r\n" + 
    			"  <script>\r\n" + 
    			"    (function(){$(\"a\").on(\"mousedown click contextmenu\",function(){var b=Math.floor(100*Math.random())+1,a=this.href.indexOf(\"url=\"),c=this.href.indexOf(\"&k=\");-1!==a&&-1===c&&(a=this.href.substr(a+4+parseInt(\"26\")+b,1),this.href+=\"&k=\"+b+\"&h=\"+a)})})();\r\n" + 
    			"</script>   \r\n" + 
    			"  <!--1564024786133--> \r\n" + 
    			"  <!--zly-->\r\n" + 
    			"  <!--weixin--> \r\n" + 
    			" </body>\r\n" + 
    			"</html>");
    	
    	doc = spider.getSogouSearch();
    	Elements elements = doc.select("dt:contains(最近文章)").parents().select("dd a");
    	String url = "http://weixin.sogou.com" + elements.attr("href");
    	System.out.println(url);
    	try {
			Thread.sleep(2000);
		} catch (InterruptedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
    	doc = spider.getDoc(url);
    	
        System.out.println(doc.toString());
//        List<String> list = spider.getTopicUrls(listUrl);
//        for (String url : list) {
//        	System.out.println(url);
//		}
    }

}
